# coding: utf-8

import urllib
import os
import shutil
from bs4 import BeautifulSoup
import json
import re
def down_all_imgs(soup, dir):
    x = 1
    for i in soup.find_all("img"):
        tmp = i.get("src")
        imgurl = tmp.replace('\\\"', '')

        nPos = imgurl.rindex(".")
        extStr = imgurl[nPos:]

        print imgurl

        urllib.urlretrieve(imgurl, "./%s/%s%s" % (dir, x, extStr))
        x += 1
    return


def descUrl(html):
    reg = r"descUrl.*?location.protocol==='http:' \? '//(.*?)'.?:"
    desurlre = re.compile(reg,re.I)
    desurl = re.findall(desurlre , html)
    return desurl

def getimg(doorName, url):
    res = urllib.urlopen(url)
    soup =BeautifulSoup(res, "html.parser")

    # 创建目录
    dir = "%s__%s" % (doorName, soup.title.string)
    if os.path.exists(dir):
        shutil.rmtree(dir)

    os.makedirs(dir)
    print u"开始下载：%s" % dir

    # 1688
    # mod-detail-description 形式的
    detail = soup.find("div", id="mod-detail-description")
    if not detail is None and len(detail) > 0:
        js = json.loads(detail.get("data-mod-config"), encoding="utf-8")
        url = js['catalog'][0]['contentUrl']
        print url

        res.close()
        res = urllib.urlopen(url)
        soup =BeautifulSoup(res, "html.parser")

        down_all_imgs(soup, dir)
        return


    # 1688
    # 获取所有图片
    div = soup.find_all("div", class_="content fd-editor")
    if not div is None and len(div) > 0:
        div_soup = BeautifulSoup(unicode(div), "html.parser")
        down_all_imgs(div_soup, dir)
        return


    # x = 1
    # for img in div_soup.find_all("img"):
    #     imgurl = img.attrs["src"]
    #     nPos = imgurl.rindex(".")
    #     extStr = imgurl[nPos:]
    #     print img.attrs["src"]
    #     urllib.urlretrieve(imgurl, "./%s/%s%s" % (dir, x, extStr))
    #     x += 1




    # taobao
    res.close()
    res = urllib.urlopen(url)
    url = descUrl(res.read())[0]
    if not url is None and len(url) > 0:
        res.close()
        res = urllib.urlopen('http://'+ url)
        soup =BeautifulSoup(res, "html.parser")

        down_all_imgs(soup, dir)

    res.close()


if __name__ == "__main__":
    file_object = open('./urls', 'r')
    urls = file_object.readlines()

    for url in urls:
        values = url.decode("utf-8").split(" ")
        #print 'ues: %s %s' % values[0] ,values[1]
        #getimg(values[0], url[1])
        getimg(values[0], values[1])

    file_object.close()
